/*
 * This file is part of the Sofia-SIP package
 *
 * Copyright (C) 2005 Nokia Corporation.
 *
 * Contact: Pekka Pessi <pekka.pessi@nokia.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA
 *
 */

/**@file utf8.h
 * Encoding/Decoding Functions for UCS Transformation Format UTF-8.
 *
 * UTF-8 encoding codes the ISO 10646 (Unicode, UCS2 and UCS4) characters as
 * variable length (1 - 6 bytes) strings of 8-bit characters.
 *
 * @author Pekka Pessi <pekka.pessi@nokia.com>
 *
 * @date Created: Tue Apr 21 15:32:38 1998 pessi

 * @sa <a href="ftp://ftp.ietf.org/rfc/rfc2279.txt">RFC 2279</a>,
 * <i>"UTF-8, a transformation format of ISO 10646"</i>,
 * F. Yergeau. January 1998.
 *
 */

#ifndef UTF8_H /** Defined when <utf8.h> has been included */
#define UTF8_H

#include "Common.h"

/*
#ifndef SU_TYPES_H
#include "su_types.h"
#endif
*/

// SOFIA_BEGIN_DECLS

typedef unsigned char utf8;
typedef unsigned short utf16;
typedef unsigned char ucs1;
typedef unsigned short ucs2;
typedef unsigned int ucs4;

size_t utf8_width(const utf8 *);

/* Latin-1 encoding/decoding */
size_t ucs18decode(char *dst, size_t dst_size, const utf8 *s);
size_t ucs1encode(utf8 *dst, const ucs1 *s, size_t n, const char quote[128]);
size_t ucs1declen(const utf8 *s);
size_t ucs1enclen(const ucs1 *s, size_t n, const char quote[128]);

/* UCS2 (BMP) encoding/decoding */
size_t ucs2decode(ucs2 *dst, size_t dst_size, const utf8 *s);
size_t ucs2encode(utf8 *dst, const ucs2 *s, size_t n, const char quote[128]);
size_t ucs2declen(const utf8 *s);
size_t ucs2enclen(const ucs2 *s, size_t n, const char quote[128]);

size_t ucs4decode(ucs4 *dst, size_t dst_size, const utf8 *s);
size_t ucs4encode(utf8 *dst, const ucs4 *s, size_t n, const char quote[128]);
size_t ucs4declen(const utf8 *s);
size_t ucs4enclen(const ucs4 *s, size_t n, const char quote[128]);

size_t ucs2len(ucs2 const *s);
int ucs2cmp(ucs2 const *s1, ucs2 const *s2);
int ucs2ncmp(ucs2 const *s1, ucs2 const *s2, size_t n);

size_t ucs4len(ucs4 const *s);
int ucs4cmp(ucs4 const *s1, ucs4 const *s2);
int ucs4ncmp(ucs4 const *s1, ucs4 const *s2, size_t n);

/*
 * IS_UCS4_n tests whether UCS4 character should be represented
 * with 'n' byte utf8 string
 */
#define IS_UCS4_1(x) ((ucs4)(x) <= 0x7fu)
#define IS_UCS4_2(x) (0x80u <= (ucs4)(x) && (ucs4)(x) <= 0x7ffu)
#define IS_UCS4_3(x) (0x800u <= (ucs4)(x) && (ucs4)(x) <= 0xffffu)
#define IS_UCS4_4(x) (0x10000u <= (ucs4)(x) && (ucs4)(x) <= 0x1fFFFFu)
#define IS_UCS4_5(x) (0x200000u <= (ucs4)(x) && (ucs4)(x) <= 0x3ffFFFFu)
#define IS_UCS4_6(x) (0x4000000u <= (ucs4)(x) && (ucs4)(x) <= 0x7fffFFFFu)

/* Special test for ISO-8859-1 characters */
#define IS_UCS4_I(x) (0x80u <= (ucs4)(x) && (ucs4)(x) <= 0xffu)

/* Length of an UCS4 character in UTF8 encoding */
#define UTF8_LEN4(x)                                                           \
    (IS_UCS4_1(x) || IS_UCS4_2(x) && 2 || IS_UCS4_3(x) && 3 ||                 \
     IS_UCS4_4(x) && 4 || IS_UCS4_5(x) && 5 || IS_UCS4_6(x) && 6)

/* Length of an UCS2 character in UTF8 encoding */
#define UTF8_LEN2(x) (IS_UCS4_1(x) || IS_UCS4_2(x) && 2 || IS_UCS4_3(x) && 3)

/*
 * IS_UTF8_n tests the length of the next wide character
 */
#define IS_UTF8_1(c) (0x00 == ((c)&0x80))
#define IS_UTF8_2(c) (0xc0 == ((c)&0xe0))
#define IS_UTF8_3(c) (0xe0 == ((c)&0xf0))
#define IS_UTF8_4(c) (0xf0 == ((c)&0xf8))
#define IS_UTF8_5(c) (0xf8 == ((c)&0xfc))
#define IS_UTF8_6(c) (0xfc == ((c)&0xfe))

/* Extension byte? */
#define IS_UTF8_X(c) (0x80 == ((c)&0xc0))
/* ISO-8859-1 character? */
#define IS_UTF8_I(c) (0xc0 == ((c)&0xfc))

#define IS_UTF8_S1(s) (IS_UTF8_1(s[0]))
#define IS_UTF8_S2(s) (IS_UTF8_2(s[0]) && ((s)[1] & 192) == 128)
#define IS_UTF8_SI(s) (IS_UTF8_I(s[0]) && ((s)[1] & 192) == 128)
#define IS_UTF8_S3(s)                                                          \
    (IS_UTF8_3(s[0]) && ((s)[1] & 192) == 128 && ((s)[2] & 192) == 128)
#define IS_UTF8_S4(s)                                                          \
    (IS_UTF8_4(s[0]) && ((s)[1] & 192) == 128 && ((s)[2] & 192) == 128 &&      \
     ((s)[3] & 192) == 128)
#define IS_UTF8_S5(s)                                                          \
    (IS_UTF8_5(s[0]) && ((s)[1] & 192) == 128 && ((s)[2] & 192) == 128 &&      \
     ((s)[3] & 192) == 128 && ((s)[4] & 192) == 128)
#define IS_UTF8_S6(s)                                                          \
    (IS_UTF8_6(s[0]) && ((s)[1] & 192) == 128 && ((s)[2] & 192) == 128 &&      \
     ((s)[3] & 192) == 128 && ((s)[4] & 192) == 128 && ((s)[5] & 192) == 128)

#define UCS4_S1(s) ((ucs4)(s[0]))
#define UCS4_S2(s) ((ucs4)(((s[0]) & 31) << 6) | ((s[1]) & 63))
#define UCS4_S3(s)                                                             \
    ((ucs4)(((s[0]) & 15) << 12) | (((s[1]) & 63) << 6) | ((s[2]) & 63))
#define UCS4_S4(s)                                                             \
    ((ucs4)(((s[0]) & 7) << 18) | (((s[1]) & 63) << 12) |                      \
     (((s[2]) & 63) << 6) | ((s[3]) & 63))
#define UCS4_S5(s)                                                             \
    ((ucs4)(((s[0]) & 3) << 24) | (((s[1]) & 63) << 18) |                      \
     (((s[2]) & 63) << 12) | (((s[3]) & 63) << 6) | ((s[4]) & 63))
#define UCS4_S6(s)                                                             \
    ((ucs4)(((s[0]) & 1) << 30) | (((s[1]) & 63) << 24) |                      \
     (((s[2]) & 63) << 18) | (((s[3]) & 63) << 12) | (((s[4]) & 63) << 6) |    \
     ((s[5]) & 63))

#define UTF8_S1(s, c) ((s)[0] = (c))
#define UTF8_S2(s, c)                                                          \
    ((s)[0] = (((c) >> 6) & 31) | 0xc0, (s)[1] = ((c)&63) | 128)
#define UTF8_S3(s, c)                                                          \
    ((s)[0] = (((c) >> 12) & 15) | 0xe0, (s)[1] = ((c >> 6) & 63) | 128,       \
     (s)[2] = ((c)&63) | 128)
#define UTF8_S4(s, c)                                                          \
    ((s)[0] = (((c) >> 18) & 7) | 0xf0, (s)[1] = ((c >> 12) & 63) | 128,       \
     (s)[2] = ((c >> 6) & 63) | 128, (s)[3] = ((c)&63) | 128)
#define UTF8_S5(s, c)                                                          \
    ((s)[0] = (((c) >> 24) & 3) | 0xf8, (s)[1] = ((c >> 18) & 63) | 128,       \
     (s)[2] = ((c >> 12) & 63) | 128, (s)[3] = ((c >> 6) & 63) | 128,          \
     (s)[4] = ((c)&63) | 128)
#define UTF8_S6(s, c)                                                          \
    ((s)[0] = (((c) >> 30) & 1) | 0xfc, (s)[1] = ((c >> 24) & 63) | 128,       \
     (s)[2] = ((c >> 18) & 63) | 128, (s)[3] = ((c >> 12) & 63) | 128,         \
     (s)[4] = ((c >> 6) & 63) | 128, (s)[5] = ((c)&63) | 128)

// SOFIA_END_DECLS

#endif /* UTF8_H */
